{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7da218f0",
   "metadata": {},
   "source": [
    "#### Data Quality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d72c6e6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fdf9948d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('data/sales_data.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0977cba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate total sales\n",
    "total_sales = df['sales'].sum() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fd98cdc",
   "metadata": {},
   "source": [
    "Improve Code to Avoid Pitfalls of Data Quality Assumption"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f656f41",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['sales'] = pd.to_numeric(df['sales'], errors='coerce')  # 'coerce' converts invalid values to NaN "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6773f033",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate total sales\n",
    "total_sales = df['sales'].sum() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab41c335",
   "metadata": {},
   "source": [
    "### Implementing Logging in Python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "edd5242f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import Built-In Logging Module in Python\n",
    "import logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a6b65d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a logger \n",
    "logger = logging.getLogger('etl_logger') \n",
    "\n",
    "# Set the level of the logger (DEBUG, INFO, WARNING, ERROR, CRITICAL) \n",
    "logger.setLevel(logging.INFO) \n",
    "\n",
    "\n",
    "# Create a file handler to log messages to a file \n",
    "file_handler = logging.FileHandler('etl_log.log') \n",
    "\n",
    "\n",
    "# Create a formatter \n",
    "formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') \n",
    "\n",
    "\n",
    "# Add the formatter to the handler \n",
    "file_handler.setFormatter(formatter) \n",
    " \n",
    "\n",
    "# Add the handler to the logger \n",
    "logger.addHandler(file_handler) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5478668",
   "metadata": {},
   "outputs": [],
   "source": [
    "# An example of using the logger \n",
    "def etl_process(): \n",
    "\n",
    "    try: \n",
    "        logger.info('Starting ETL process') \n",
    "\n",
    "        # Your ETL code here... \n",
    "        \n",
    "        # for example:\n",
    "        \n",
    "        # extract()\n",
    "        # transform()\n",
    "        # load()\n",
    "\n",
    "        logger.info('ETL process completed successfully') \n",
    "\n",
    "    except Exception as e: \n",
    "\n",
    "        logger.error('ETL process failed', exc_info=True) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "802aadc8",
   "metadata": {},
   "source": [
    "### Checkpoint for Recovery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f92b1018",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample Extract Logging\n",
    "def extract(): \n",
    "    logger.info('Starting extraction') \n",
    "    \t# extraction code here... \n",
    "    logger.info('Extraction completed') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "993d5820",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample Transform Logging\n",
    "def transform(): \n",
    "    logger.info('Starting transformation') \n",
    "    \t# transformation code here... \n",
    "    logger.info('Transformation completed') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afc3eca2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample Load Logging\n",
    "def load(): \n",
    "    logger.info('Starting load') \n",
    "    \t# loading code here... \n",
    "    logger.info('Load completed') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "277473c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample ETL with Logging\n",
    "def etl_process(): \n",
    "\n",
    "    try: \n",
    "        extract() \n",
    "        transform() \n",
    "        load() \n",
    "        logger.info('ETL process completed successfully') \n",
    "    except Exception as e: \n",
    "        logger.error('ETL process failed', exc_info=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "516daea8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# run ETL\n",
    "etl_process() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2506b814",
   "metadata": {},
   "source": [
    "### Avoiding Single Points of Failure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8f73d01",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Source 1 is the primary source\n",
    "def extract_from_source1(): \n",
    "    # Your extraction code here... \n",
    "    pass \n",
    "\n",
    "# Source 2 is the redundant source\n",
    "def extract_from_source2(): \n",
    "    # Your extraction code here... \n",
    "\n",
    "    pass "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "821df86a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract(): \n",
    "\n",
    "    try: \n",
    "        logger.info('Starting extraction from Source 1') \n",
    "        extract_from_source1() \n",
    "        logger.info('Extraction from Source 1 completed') \n",
    "\n",
    "    except Exception as e: \n",
    "        logger.error('Failed to extract from Source 1', exc_info=True) \n",
    "\n",
    "        try: \n",
    "            logger.info('Starting extraction from Source 2') \n",
    "            extract_from_source2() \n",
    "            logger.info('Extraction from Source 2 completed') \n",
    "\n",
    "        except Exception as e: \n",
    "            logger.error('Failed to extract from Source 2', exc_info=True) \n",
    "            raise "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
